notebook.community

Edit and run



In [1]:

    
import numpy as np
import math
import os
import time



In [2]:

    
from sklearn import mixture



In [3]:

    
# import custom functions
import sys
# path to libraries
# currently in ../scripts-lib/
tool_path = os.path.abspath('../scripts-lib')

if tool_path not in sys.path:
    sys.path.append(tool_path)
import lib_phones as lph

# print the loaded functions
print dir(lph)[5:]









    



['find_phone_index', 'load_phone_file']



In [4]:

    
# load phone list
phone_path = os.path.abspath('../datasets/TIMIT-MFCCs/TIMIT_phone_list.txt')
phone_list = lph.load_phone_file(phone_path)
print len(phone_list), phone_list









    



61 ['aa', 'ae', 'ah', 'ao', 'aw', 'ax', 'ax-h', 'axr', 'ay', 'b', 'bcl', 'ch', 'd', 'dcl', 'dh', 'dx', 'eh', 'el', 'em', 'en', 'eng', 'epi', 'er', 'ey', 'f', 'g', 'gcl', 'h#', 'hh', 'hv', 'ih', 'ix', 'iy', 'jh', 'k', 'kcl', 'l', 'm', 'n', 'ng', 'nx', 'ow', 'oy', 'p', 'pau', 'pcl', 'q', 'r', 's', 'sh', 't', 'tcl', 'th', 'uh', 'uw', 'ux', 'v', 'w', 'y', 'z', 'zh']



In [5]:

    
#load mfccs into sklearn observations, each frame is an obs

train_TIMIT_dir = os.path.abspath('../datasets/TIMIT-MFCCs/dev')

train_obs = []
train_obs_labels = []

# walk the directories
for (path, dirs, files) in os.walk(train_TIMIT_dir):
    print "working in path : " + path

    for file in files:
        # skip the SA files
        #dev, only work on file si1573.mfcc.csv     "si1573" in file and
        if ".mfcc" in file  and "sa" not in file:
            #check if corresponding .phn file exists
            if not os.path.exists(path + "/" + file[:-8] + "phn"):
                print path + "/" + file[:-8] + "phn"
                print "corresponding .phn file does not exist!"
            else:
                
                print "working on: " + file
#                 print "from path : " + path

                # open the files
                mfcc_file = open(path + "/" + file)
                phn_file = open(path + "/" + file[:-8] + "phn")

                # extract phone times
                phone_times = []
                for phn_line in phn_file:
                    phone_times.append(phn_line.split())
                # transpose for easier use
                phone_times = map(list, zip(*phone_times))

                # skip mfcc_file header
                next(mfcc_file)

                # reset frame count
                frame_cnt = 0

                # for each line of mfcc_file
                for mfcc_line in mfcc_file:

                    # increment frame count
                    frame_cnt += 1 

                    # print "frame line #:", frame_cnt 

                    # frame start time in seconds
                    start_t = mfcc_line.split(";")[1]

                    # create frame (skiping first 2 values, frame_index and frame_time)
                    frame = map( float,  mfcc_line.split(";")[2:])
                    # print numpy.shape(frame)
                    # print frame

                    # find correspond phoneme and index in the list
                    phn_index = lph.find_phone_index(start_t, phone_times, phone_list)

                    # add to instances
                    train_obs.append(frame)
                    train_obs_labels.append(phone_list[phn_index])









    



working in path : C:\Users\FG\Desktop\PhD\Research\Reservoirs\datasets\TIMIT-MFCCs\dev
working on: si1027.mfcc.csv
working on: si1105.mfcc.csv
working on: si1657.mfcc.csv
working on: si1735.mfcc.csv
working on: si475.mfcc.csv
working on: si648.mfcc.csv
working on: sx115.mfcc.csv
working on: sx127.mfcc.csv
working on: sx205.mfcc.csv
working on: sx217.mfcc.csv
working on: sx25.mfcc.csv
working on: sx295.mfcc.csv
working on: sx307.mfcc.csv
working on: sx37.mfcc.csv
working on: sx385.mfcc.csv
working on: sx397.mfcc.csv



In [6]:

    
print np.shape(train_obs)
print np.shape(train_obs_labels)









    



(4266, 39)
(4266,)



In [7]:

    
print train_obs[0]
print train_obs_labels[0:100]









    



[-24.63259, -12.55975, -0.4234125, -22.09616, -11.36787, 15.35193, 0.2450585, -18.58464, -28.29842, -5.80051, 2.788154, 4.604296, -17.4054, -0.275135, 1.378781, -1.950969, 3.052481, 3.25547, 1.203516, -0.7550904, -0.3006264, 2.328182, 3.589715, 2.428725, 4.183867, -0.1700937, -0.4764511, 0.5360677, 0.24275, 0.9877825, 0.0331907, -0.5391095, 0.6034231, 1.644038, 1.840576, 0.2822124, -0.1602798, -0.7427405, -0.02820093]
['h#', 'h#', 'h#', 'h#', 'h#', 'h#', 'h#', 'h#', 'h#', 'h#', 'q', 'q', 'q', 'q', 'q', 'q', 'q', 'q', 'q', 'iy', 'iy', 'iy', 'iy', 'iy', 'iy', 'iy', 'iy', 'iy', 'v', 'v', 'v', 'v', 'ih', 'ih', 'ih', 'ih', 'ih', 'ih', 'ih', 'ih', 'n', 'n', 'n', 'n', 'n', 'n', 'n', 'eh', 'eh', 'eh', 'eh', 'eh', 'eh', 'eh', 'eh', 'eh', 'eh', 'eh', 'eh', 'eh', 'eh', 'eh', 'eh', 'eh', 'eh', 'n', 'n', 'n', 'n', 'n', 'n', 'n', 'n', 'n', 'n', 'n', 'n', 'n', 'q', 'q', 'q', 'q', 'q', 'q', 'q', 'q', 'q', 'q', 'q', 'q', 'q', 'q', 'q', 'q', 'ix', 'ix', 'ix', 'ix', 'ix', 'f']



In [8]:

    
#create gmm
num_components = 10
num_iter=2
g = mixture.GMM(n_components= num_components, n_iter=num_iter)



In [9]:

    
# fit the GMM to the observations
g.fit(train_obs)









    Out[9]:





GMM(covariance_type='diag', init_params='wmc', min_covar=0.001,
  n_components=10, n_init=1, n_iter=2, params='wmc', random_state=None,
  thresh=None, tol=0.001)



In [10]:

    
# pred = g.predict(train_obs)
# print train_obs_labels
# print pred



In [11]:

    
print np.round(g.score(train_obs).mean(), 2)
print np.round(g.score(train_obs).var(), 2)
print np.round(g.score(train_obs)[0:5] )









    



-104.66
87.85
[-102. -114. -104. -102. -100.]



In [12]:

    
# refit, time 
t0 = time.time()
g.fit(train_obs)
t1 = time.time()
print num_components, "components", num_iter, "iterations in ", t1-t0, "for", len(train_obs_labels), "observations."









    



10 components 2 iterations in  1.50999999046 for 4266 observations.



In [13]:

    
print np.round(g.score(train_obs).mean(), 2)
print np.round(g.score(train_obs).var(), 2)
print np.round(g.score(train_obs)[0:5] )









    



-104.65
87.9
[-102. -114. -104. -103. -100.]

Refitting does work as expected. (does not reset gmm).



In [14]:

    
#save gmm in pickled form
import cPickle as pickle

# name and location to save in
pickle_name = "TIMIT_ubm_gmm_" + str(num_components) + ".pckl"
pickle_dir = os.path.abspath('../datasets/TIMIT Pickled Data')

if not os.path.isdir(pickle_dir):
    os.makedirs(pickle_dir)
    
pickle.dump( g, open( pickle_dir + os.sep + pickle_name, "wb") )
print "saved gmm in ", pickle_dir + '\\' + pickle_name









    



saved gmm in  C:\Users\FG\Desktop\PhD\Research\Reservoirs\datasets\TIMIT Pickled Data\TIMIT_ubm_gmm_10.pckl



In [15]:

    
# reload pickled file for testing
g2 = pickle.load( open(pickle_dir + os.sep + pickle_name, "rb") )
print "loaded gmm from ", pickle_dir + os.sep + pickle_name









    



loaded gmm from  C:\Users\FG\Desktop\PhD\Research\Reservoirs\datasets\TIMIT Pickled Data\TIMIT_ubm_gmm_10.pckl



In [ ]: